 /*
Purpose: Clean Census data and restrict to NAEP cohorts
*/

use "data/raw/usa_00011", clear

//create overall income flags
gen qincome = (qincbus!=0&qincbus!=.)|(qincfarm!=0&qincfarm!=.)|(qincinvs!=0&qincinvs!=.)|(qincothe!=0&qincothe!=.)|(qincreti!=0&qincreti!=.)|(qincss!=0&qincss!=.)|(qinctot!=0&qinctot!=.)|(qincwage!=0&qincwage!=.)|(qincwelf!=0&qincwelf!=.)

//testing against qinctot where it exists
assert qincome==0 if qinctot==0
assert qincome==1 if qinctot==4

tab qincome //23% allocated

//now by family and household
bys year serial: egen qhhincome = max(qincome)
bys year serial famunit: egen qftotinc = max(qincome)

//how many families/hhs are allocated?

preserve
duplicates drop year serial, force
tab qhhincome //39% of HH's allocated
restore

preserve
duplicates drop year serial famunit, force
tab qftotinc //39% of families allocated
restore

keep if (inlist(birthyr,1977,1978,1979,1983,1985,1987,1989)&year==1990)|((birthyr==1987|inrange(birthyr,1989,1999))&year==2000)|(inrange(birthyr,1995,2002)&year==2009)

save "data/clean/NAEP cohorts", replace